import logging
from init import PATHS
LOGGER = logging.getLogger(__name__)
import numpy as np
import pandas as pd


def main():
	## Transforming the IEA data in PPP 2018 measures, with updated data
	IEA = read_IEA()
	IEA_PPP_2018 = make_IEA_data(IEA, ['HGENCELL', 'OTHERPANDS', '1311VBAT'])
	IEA_PPP_2018['FLOW'] = IEA_PPP_2018['FLOW'].replace({'1311VBAT': 'OTHERPANDS'})  ## we combine OTHERPANDS and 1311VBAT
	IEA_PPP_2018 = IEA_PPP_2018.groupby(['COUNTRY', 'FLOW', 'YEAR'])['VALUE_2018_PPP'].sum().reset_index()
	IEA_PPP_2018_wide = IEA_PPP_2018[['COUNTRY', 'FLOW', 'YEAR', 'VALUE_2018_PPP']].pivot(index=['FLOW', 'YEAR'], columns='COUNTRY', values='VALUE_2018_PPP')
	IEA_PPP_2018_wide.to_csv(PATHS.dropbox / 'Data_outputted/D_Policy/IEA_PPP_2018_recomputedbyMD.csv')


def read_IEA(replace_zeros=True):
	file_path = PATHS.IEA / 'COUNTRY_BUDGETS.TXT'
	IEA = pd.read_csv(file_path, sep=r'\s+', header=None)  
	IEA = IEA.rename(columns={0: 'COUNTRY', 1: 'PRODUCT', 2: 'FLOW', 3: 'YEAR', 4: 'VALUE'})
	if replace_zeros:
		IEA['VALUE'] = IEA['VALUE'].replace('..', '0').astype(float)
	return IEA 


def make_IEA_data(IEA, flows):
	focus_countries = ['FRANCE', 'GERMANY', 'JAPAN', 'KOREA', 'UK', 'USA'] ## add UK and USA from the third sheet
	IEA = IEA[IEA['COUNTRY'].isin(focus_countries)]
	IEA = IEA[IEA['PRODUCT'].isin(['GOVTRD', 'GOVTDEMO'])]
	IEA = IEA[IEA['FLOW'].isin(flows)]
	IEA = IEA[IEA['YEAR'] >= 1995]
	IEA = IEA.groupby(['COUNTRY','FLOW','YEAR'])['VALUE'].sum().reset_index()
	ppp = pd.read_csv(PATHS.IEA / 'PPP_conversion_WB.csv')
	ppp = ppp.loc[(ppp['Country Name'].isin(['France', 'Germany', 'Japan', 'Korea, Rep.', 'United States','United Kingdom'])),['Country Name']+[str(year) for year in np.arange(1990,2021)]]
	ppp_factors = pd.melt(ppp, id_vars=['Country Name'], var_name='YEAR', value_name='annual_ppp_factor')
	ppp_factors = ppp_factors.rename(columns={'Country Name': 'COUNTRY'})
	ppp_factors['YEAR'] = ppp_factors['YEAR'].astype('int')
	ppp_factors['COUNTRY'].replace({'United Kingdom': 'UK', 'United States': 'USA', 'Korea, Rep.': 'Korea'}, inplace=True)
	ppp_factors['COUNTRY'] = ppp_factors['COUNTRY'].apply(lambda x: x.upper())
	IEA_ppp = IEA.merge(ppp_factors, on=['COUNTRY', 'YEAR'], how='left')
	deflator = pd.read_csv(PATHS.IEA / 'GDP_US_deflator_WB.csv')
	deflator  = pd.melt(deflator, id_vars=['Country Name', 'Country Code'], var_name='YEAR', value_name='deflator')
	deflator['YEAR'] = deflator['YEAR'].astype('int')
	deflator['base2018'] = deflator['deflator']**(-1)
	deflator['base2018'] = deflator['base2018'].mul(float(deflator.loc[deflator['YEAR'] == 2018, 'deflator'].values))
	IEA_ppp_2018 = IEA_ppp.merge(deflator[['YEAR', 'base2018']], on='YEAR', how='left')
	IEA_ppp_2018['VALUE_2018_PPP'] = IEA_ppp_2018['VALUE']/IEA_ppp_2018['annual_ppp_factor'] * IEA_ppp_2018['base2018']
	return IEA_ppp_2018

